Seperate genes based on different patterns

Input for run_select_pattern.sh: RSEM result (GeneMat_HFD_Lingon_LDF.Ebseqresults/GeneMat_HFD_Lingon_LDF.Ebseqresults_FDR_0.05.tab)

Output: Gene list with EnsemblID and GeneSymbol, classified as diffent patterns (Output folder: Matrix/pattern)

#seperate genes based on different patterns
scr/run_select_pattern.sh
#full list of expressed genes
cut -f 1 Matrix/GeneMat_HFD_Lingon_LDF.Ebseqresults | sed $'s/_/\t/' | sed $'s/"//g'|tail -n +2 > Matrix/full_gene_list.csv

GO analysis

Load data

pattern2.FDR0.05 <- read.table("Matrix/pattern/pattern2_FDR0.05.csv", header = TRUE)
pattern3.FDR0.05 <- read.table("Matrix/pattern/pattern3_FDR0.05.csv", header = TRUE)
pattern4.FDR0.05 <- read.table("Matrix/pattern/pattern4_FDR0.05.csv", header = TRUE)
full_gene <- read.table("Matrix/full_gene_list.csv", header = FALSE)
names(full_gene) <- c("EnsemblID","GeneSymbol")
#load packages
library(clusterProfiler)
library(org.Mm.eg.db)
library(ggplot2)
library(gprofiler2)

RSEM result summary

Summarize the number of genes classified to each pattern (FDR cutoff = 0.05)

pattern_table <- read.table("Matrix/GeneMat_HFD_Lingon_LDF.Ebseqresults.pattern",header = TRUE)
names(pattern_table) <- c("HDF","Lingon","LFD")
RSEM_summary <-data.frame(c(0,155,40,123,0,318),
                 row.names = c("pattern1.FDR0.05","pattern2.FDR0.05","pattern3.FDR0.05","pattern4.FDR0.05","pattern5.FDR0.05","Sum"))
names(RSEM_summary) <- c("number of genes")
knitr::kable(pattern_table)
HDF Lingon LFD
Pattern1 1 1 1
Pattern2 1 1 2
Pattern3 1 2 1
Pattern4 1 2 2
Pattern5 1 2 3
knitr::kable(RSEM_summary)
number of genes
pattern1.FDR0.05 0
pattern2.FDR0.05 155
pattern3.FDR0.05 40
pattern4.FDR0.05 123
pattern5.FDR0.05 0
Sum 318

For example:

Genes belonging to Pattern 1 equals no differential change between any of the experiments

Genes belonging to Pattern 2 are differentially expressed in LFD compared to HFD & Lingon

Genes in Pattern 3 are DE in Lingon compared to the other experiments

Genes in Pattern 4 have similar expression in Lingon and LFD but are differentially expressed in HFD

GO analysis using clusterProfiler

(clusterProfiler v3.16.1 For help: https://guangchuangyu.github.io/software/clusterProfiler)

ego<- function(pattern_data){
   ego_pattern <- enrichGO(gene = pattern_data[,1], 
                           universe = full_gene[,1],
                           OrgDb = org.Mm.eg.db, 
                           keyType = "ENSEMBL", 
                           ont = 'ALL',       
                           pAdjustMethod = "BH",
                           pvalueCutoff  = 0.01,
                           qvalueCutoff  = 0.05,
                           readable      = TRUE)

   str1 <- "GO_results/clusterProfiler/"
   str2 <- "_GO_enrich.csv"
   file_name <- paste(str1, deparse(substitute(pattern_data)), str2, sep = "")
   write.csv(as.data.frame(ego_pattern),file_name,row.names = F)
   return(ego_pattern)
}

GO analysis for gene assigned to pattern2.FDR0.05

ego_pattern2.FDR0.05 <- ego(pattern2.FDR0.05)
barplot(ego_pattern2.FDR0.05,showCategory=20,drop=T)

dotplot(ego_pattern2.FDR0.05,showCategory=20)

heatplot(ego_pattern2.FDR0.05)

GO analysis for gene assigned to pattern3.FDR0.05

ego_pattern3.FDR0.05 <- ego(pattern3.FDR0.05)
barplot(ego_pattern3.FDR0.05,showCategory=20,drop=T)

dotplot(ego_pattern3.FDR0.05,showCategory=20)

heatplot(ego_pattern3.FDR0.05)

GO analysis for gene assigned to pattern4.FDR0.05

ego_pattern4.FDR0.05 <- ego(pattern4.FDR0.05)
barplot(ego_pattern4.FDR0.05,showCategory=20,drop=T)

dotplot(ego_pattern4.FDR0.05,showCategory=20)

heatplot(ego_pattern4.FDR0.05)

GO analysis using gprofiler

(gprofiler2 version 0.2.0 For help:https://cran.r-project.org/web/packages/gprofiler2/vignettes/gprofiler2.html)

GO analysis for gene assigned to pattern2.FDR0.05

gost_pattern2 <- gost(pattern2.FDR0.05[,1], organism ='mmusculus', sources = 'GO', significant = TRUE)
gostplot(gost_pattern2)

GO analysis for gene assigned to pattern3.FDR0.05

gost_pattern3 <- gost(pattern3.FDR0.05[,1], organism ='mmusculus', sources = 'GO', significant = TRUE)
gostplot(gost_pattern3)

GO analysis for gene assigned to pattern4.FDR0.05

gost_pattern4 <- gost(pattern4.FDR0.05[,1], organism ='mmusculus', sources = 'GO', significant = TRUE)
gostplot(gost_pattern4)

GO analysis output

clusterProfiler

clusterProfiler GO analysis output

clusterProfiler GO analysis output files are in the ./GO_results/clusterProfiler